#=====================preamble===========================

#load necessary packages
library(raster)
library(blockCV)
library(biomod2)
library(ggplot2)
library(ggthemes)
library(snowfall)
library(dismo)
library(ROCR)
library(gbm)
#set base wd (only for sourcing this script)
# this.dir <- dirname(parent.frame(2)$ofile)
# setwd(this.dir)
#=====================loading data=========================

#load species data
load("Data/PA.RData")
#check for NA in PA
any(is.na(PA))
#yes there are NAs, remove
PA <- na.omit(PA)
# #load predictor data
# source("Scripts/load_var_stack.R")

#==================preparing data=============================
#define species we are going to model
species <- PA[,37:72]
species <- species[,c(3,6,7,9,10,12,15:18,24,26,28,34)]
sp.names <- c('eucapauc', 'eucaobli', 'eucapilu', 'corymacu')
species <- species[,which(colnames(species) %in% sp.names)]
nsp <- ncol(species)
#build covariate data (we do not use raster stacks here because it takes too long to calculate)
cov.names <- c("bc02", "bc04", "bc05", "bc12", "bc14", "bc21", "bc32", "bc33", "rjja", "rsea", "rugg")
covariates <- PA[,which(colnames(PA) %in% cov.names)]
covariates <- as.data.frame(base::scale(covariates))

all.sp.data <- cbind(PA[,2:3],covariates,species)
#remove unused variable
rm(species,covariates)

slice <- c(-38,-36,-34,-32,-30,-28)
#make the blocks
spBlocks <- as.data.frame(matrix(nrow = 32556, ncol = 5))

for(f in 1:5) {
    for (n in 1:32556){
        if (all.sp.data[n,2] > slice[f] & all.sp.data[n,2] < slice[f+1]){
            spBlocks[n,f] <- FALSE
        }else{spBlocks[n,f] <- TRUE}
    }  
    colnames(spBlocks) <- paste("RUN",1:f, sep = "")
}
# spBlocks <- as.matrix(spBlocks)
#===============formatting data into BIOMOD format====================
#normally we will simply supply the blocks to BIOMOD using data split table
#but here we will isolate out one fold for true external tests
fold <- c(1,5)
n.fold <- 2

blocked.data <- vector('list', nsp)
for (s in 1:nsp){
    blocked.data[[s]] <- vector('list', n.fold)
    for (f in fold) {
        spBlocks$row.ID <- 1:nrow(spBlocks)
        thinnedblock <- as.data.frame(spBlocks[spBlocks[,f],])
        thinnedblock <- thinnedblock[sample(nrow(thinnedblock), nrow(thinnedblock)/10),]
        CV_data <- all.sp.data[thinnedblock$row.ID,]
        CV_table <- as.matrix(spBlocks[thinnedblock$row.ID,-c(f,6)])
        ex_data <- all.sp.data[!spBlocks[,f],]
        format.data <- BIOMOD_FormatingData(
            resp.var = CV_data[,s+13], 
            expl.var = CV_data[,3:13], 
            resp.xy = CV_data[,1:2], 
            resp.name = sp.names[s])
        blocked.data[[s]][[f]] <- list(format.data,CV_table,CV_data,ex_data)
    }
    rm(format.data,CV_table,CV_data,ex_data,thinnedblock)
}

rm(spBlocks,PA, all.sp.data)
#==========================CV models===========================

#get file path to be in a CV subfolder
dir.create(paste0(getwd(),"/Output/CV_parallel_lat_thin"))
setwd(dir = "Output/CV_parallel_lat_thin/")

#initiate parallel computing
sfInit(parallel=TRUE, cpus=4 ) ##I have 6 cores, using 4 here
## Export packages
sfLibrary('biomod2', character.only=TRUE)
## Export variables
sfExport('blocked.data')
sfExport('sp.names')
sfExport('fold')

#write a function to build all CV models

CV_modelling <- function(sp.list){
    nsp <- sp.list
    this.sp <- sp.names[nsp]
    CV_models <- vector('list', 5)
    for (f in fold) {
        block_model <- BIOMOD_Modeling(data = blocked.data[[nsp]][[f]][[1]], 
                                       models = c('GLM','GBM','GAM','CTA','ANN','FDA','MARS','RF'), 
                                       models.eval.meth = c('ROC'),
                                       DataSplitTable = blocked.data[[nsp]][[f]][[2]],
                                       VarImport = 1,
                                       modeling.id = paste(this.sp,"block_CV",f,sep="_"))
        block_AUC <- get_evaluations(block_model,as.data.frame = T)
        rr_model <- BIOMOD_Modeling(data = blocked.data[[nsp]][[f]][[1]],
                                    models = c('GLM','GBM','GAM','CTA','ANN','FDA','MARS','RF'), 
                                    models.eval.meth = c('ROC'),
                                    NbRunEval = 4,
                                    DataSplit = 75,
                                    do.full.models = F,
                                    VarImport = 1,
                                    modeling.id = paste(this.sp,"rr_CV",f,sep="_"))
        rr_AUC <- get_evaluations(rr_model,as.data.frame = T)
        block_weight_em_CV <- BIOMOD_EnsembleModeling(block_model, 
                                                      eval.metric = 'ROC', 
                                                      models.eval.meth = c('ROC'), 
                                                      prob.mean = T, 
                                                      prob.mean.weight = T, 
                                                      eval.metric.quality.threshold = NULL,
                                                      VarImport = 1)
        block_em_AUC <- get_evaluations(block_weight_em_CV,as.data.frame = T)
        rr_weight_em_CV <- BIOMOD_EnsembleModeling(rr_model, 
                                                   eval.metric = 'ROC', 
                                                   models.eval.meth = c('ROC'), 
                                                   prob.mean = T, 
                                                   prob.mean.weight = T, 
                                                   eval.metric.quality.threshold = NULL,
                                                   VarImport = 1)
        rr_em_AUC <- get_evaluations(rr_weight_em_CV,as.data.frame = T)
        CV_models[[f]] <- list(block_model,block_AUC,rr_model,rr_AUC,block_weight_em_CV,block_em_AUC,rr_weight_em_CV,rr_em_AUC)
        rm(block_model,rr_model,block_AUC,rr_AUC,block_weight_em_CV,block_em_AUC,rr_weight_em_CV,rr_em_AUC)
        gc()
    }
    gc()
    return(CV_models)
}




## Do the run
CV_output <- sfLapply(1:4, CV_modelling)


## stop snowfall
sfStop( nostop=FALSE)




#===================build full models & external evaluations=========================================
#set new directory
setwd("..")
dir.create("full models_lat_thinned")
setwd("full models_lat_thinned/")



#we will build and predict full models in one go - this is mostly because BIOMOD ensembles rely on local modelling objects, so we cannot easily override weights for ensembles (two sets of individual models must be made)

#we will also calculate AUC and log likelihood here, because it is difficult to save biomod predictions in R environment (they are written to local files)

###but first, we define the function for calculating log-likelihood
calc.logl <- function (pred, obs) 
{
    y_i <- obs
    u_i <- pred
    #an issue here is that some biomod models produce 0s or 1s as output, which will not work for log calculation, here we approximate them using machine epsilon, same with NA, which FDAs can predict, possibly because it does not know how to classify beyond sampled environment. Here, we assign a neutral prediction value (0.5) to NAs
    u_i[which(u_i == 0)] <- .Machine$double.eps
    u_i[which(u_i == 1)] <- 1 - .Machine$double.eps
    u_i[is.na(u_i)] <- 0.5
    logl_i <- (y_i * log(u_i)) + ((1 - y_i) * log(1 - u_i))
    logl_total <- sum(logl_i)/length(obs)
    return(logl_total)
}


sfInit(parallel=TRUE, cpus=4 ) ##I have 6 cores, using 3 here
## Export packages
sfLibrary('biomod2', character.only=TRUE)
## Export variables
sfExport('blocked.data')
sfExport('sp.names')
sfExport("CV_output")
sfExport("calc.logl")
sfExport("fold")


#write a function to run evaluate through all models

biomod_eval <- function(sp.list){
    nsp <- sp.list
    this.sp <- sp.names[nsp]
    eval_result <- vector('list', 5)
    for (f in fold) {
        full_model <- BIOMOD_Modeling(data = blocked.data[[nsp]][[f]][[1]], 
                                      models = c('GLM','GBM','GAM','CTA','ANN','FDA','MARS','RF'), 
                                      models.eval.meth = c('ROC'),
                                      VarImport = 1,
                                      modeling.id = paste(this.sp,"full_model",f,sep="_"))
        full_model_eval <- biomod2::evaluate(full_model,
                                             blocked.data[[nsp]][[f]][[4]][,c(13+nsp,3:13)], 
                                             stat = c('ROC')) #for AUC
        full_model_pred <- BIOMOD_Projection(full_model,#this bit is for logl
                                             blocked.data[[nsp]][[f]][[4]][,3:13],
                                             proj.name = paste(this.sp),
                                             on_0_1000 = F)#so output is scaled to 0-1
        full_model_logl <- apply(full_model_pred@proj@val,MARGIN = 2,
                                 FUN = calc.logl, obs =  blocked.data[[nsp]][[f]][[4]][,c(13+nsp)])
        #replace internal eval value with CV value
        full_model@models.evaluation@val[1,1,1:8,1,1] <- rowMeans(CV_output[[nsp]][[f]][[1]]@models.evaluation@val[1,1,1:8,1:4,1])
        block_weight_em_full <- BIOMOD_EnsembleModeling(full_model, 
                                                        eval.metric = 'ROC', 
                                                        models.eval.meth = c('ROC'), 
                                                        prob.mean = T, 
                                                        prob.mean.weight = T, 
                                                        eval.metric.quality.threshold = NULL,
                                                        VarImport = 1)
        block_weight_em_eval <- biomod2::evaluate(block_weight_em_full,
                                                  blocked.data[[nsp]][[f]][[4]][,c(13+nsp,3:13)], 
                                                  stat = c('ROC'))
        block_weight_em_pred <- BIOMOD_EnsembleForecasting(block_weight_em_full,
                                                           new.env = blocked.data[[nsp]][[f]][[4]][,3:13],
                                                           on_0_1000 = F,
                                                           proj.name = paste(this.sp))
        block_model_logl <- apply(block_weight_em_pred@proj@val,MARGIN = 2,
                                  FUN = calc.logl, obs =  blocked.data[[nsp]][[f]][[4]][,c(13+nsp)])
        #replace internal eval value with CV value
        full_model@models.evaluation@val[1,1,1:8,1,1] <- rowMeans(CV_output[[nsp]][[f]][[3]]@models.evaluation@val[1,1,1:8,1:4,1])
        rr_weight_em_full <- BIOMOD_EnsembleModeling(full_model, 
                                                     eval.metric = 'ROC', 
                                                     models.eval.meth = c('ROC'), 
                                                     prob.mean = F, 
                                                     prob.mean.weight = T, 
                                                     eval.metric.quality.threshold = NULL,
                                                     VarImport = 1)
        rr_weight_em_eval <- biomod2::evaluate(rr_weight_em_full,
                                               blocked.data[[nsp]][[f]][[4]][,c(13+nsp,3:13)], 
                                               stat = c('ROC'))
        rr_weight_em_pred <- BIOMOD_EnsembleForecasting(rr_weight_em_full,
                                                        new.env = blocked.data[[nsp]][[f]][[4]][,3:13],
                                                        on_0_1000 = F,
                                                        proj.name = paste(this.sp))
        rr_model_logl <- apply(rr_weight_em_pred@proj@val,MARGIN = 2,
                               FUN = calc.logl, obs =  blocked.data[[nsp]][[f]][[4]][,c(13+nsp)])
        eval_result[[f]] <- list(full_model_eval,block_weight_em_eval,rr_weight_em_eval, full_model_logl,block_model_logl, rr_model_logl)
        rm(full_model_eval,block_weight_em_eval,rr_weight_em_eval,full_model,block_weight_em_full,rr_weight_em_full, rr_weight_em_pred, block_weight_em_pred, full_model_pred, full_model_logl,block_model_logl, rr_model_logl)
        gc()
    }
    gc()
    return(eval_result)
}



## Do the run
biomod_eval_output <- sfLapply(1:4, biomod_eval)


## stop snowfall
sfStop( nostop=FALSE)
#restore wd
setwd("..")





#===================external BRTs=========================================
#build BRTs for evaluation on external data (use same data as above)

#initiate snowfall
sfInit(parallel=TRUE, cpus=4 ) ##I have 6 cores, using 4 here
## Export packages
sfLibrary('dismo', character.only=TRUE)
## Export variables
sfExport('blocked.data')
sfExport('fold')


#write a function to build BRTs

BRT_modelling <- function(sp.list){
    nsp <- sp.list
    BRT_models <- vector('list', 5)
    for (f in fold) {
        BRT_models[[f]] <- gbm.step(data = blocked.data[[nsp]][[f]][[3]],
                                    gbm.x = 3:13,
                                    gbm.y = 13+nsp,
                                    tree.complexity = 5,
                                    learning.rate = 0.002, 
                                    n.folds = 4, 
                                    max.trees = 20000,
                                    bag.fraction = 0.75)
        gc()
    }
    gc()
    return(BRT_models)
}

## Do the run
BRT_output <- sfLapply(1:4, BRT_modelling)


## stop snowfall
sfStop( nostop=FALSE)




#dismo model evaluations
#because BRT model objects are really large, for this it is best not to use parallel processing

#write a function to first project then test all BRT models


BRT_eval <- function(sp.list){
    nsp <- sp.list
    eval_result <- vector('list', 5)
    for (f in fold) {
        pred_f <- predict.gbm(BRT_output[[nsp]][[f]], 
                              newdata = blocked.data[[nsp]][[f]][[4]][,3:13], 
                              n.trees=BRT_output[[nsp]][[f]]$gbm.call$best.trees, 
                              type = "response")
        AUC_f <- ROCR::prediction(pred_f,blocked.data[[nsp]][[f]][[4]][,13+nsp])
        AUC_f <- ROCR::performance(AUC_f, measure = "auc")@y.values
        logl_f <- calc.logl(pred_f,blocked.data[[nsp]][[f]][[4]][,13+nsp])
        eval_result[[f]] <- list(AUC_f,logl_f)
        gc()
    }
    gc()
    return(eval_result)
}




## Do the run
BRT_eval_output <- lapply(1:4, BRT_eval)


#========================compiling AUC========================================
#first of all we want to know which models performed best on CV and how well they perform on external
#find best performing model

#first define a vector of model names
ind.mod.names <- c("GLM","BRT","GAM","CTA","ANN","FDA","MARS","RF")

#extract best models for each run
best.block.model <- best.random.model <- vector("list",length = nsp)
for (s in 1:nsp){
    for (f in fold){
        best.block.model[[s]] <- c(best.block.model[[s]],ind.mod.names[rowMeans(CV_output[[s]][[f]][[1]]@models.evaluation@val[1,1,1:8,1:4,1]) == max(rowMeans(CV_output[[s]][[f]][[1]]@models.evaluation@val[1,1,1:8,1:4,1]))])
        best.random.model[[s]] <- c(best.random.model[[s]],ind.mod.names[rowMeans(CV_output[[s]][[f]][[3]]@models.evaluation@val[1,1,1:8,1:4,1]) == max(rowMeans(CV_output[[s]][[f]][[3]]@models.evaluation@val[1,1,1:8,1:4,1]))])
    }
}
#this is just for our reference, we will not need to use this in the next stage


###make a big empty data frame to contain all AUC values
no.model.per.run <- 14 #remember to include the 'best' CV models
AUC.all.lat <- data.frame(speciesName = I(rep(sp.names,each = n.fold*no.model.per.run)),
                          Fold = rep(fold,each = no.model.per.run, times = nsp),
                          Model = rep(c(ind.mod.names,"block best","random best","Mean","blockWA","randomWA","dismoBRT"),n.fold*nsp),
                          AUC = rep(NA,n.fold*nsp*no.model.per.run)
)

#fill in AUC results
for (s in 1:nsp){
    for (f in fold){
        AUC.all.lat$AUC[which(AUC.all.lat$speciesName == sp.names[s] & AUC.all.lat$Fold == f)] <- as.numeric(c(
            matrix(unlist(biomod_eval_output[[s]][[f]][[1]]),ncol = 4,byrow = T)[,1],
            matrix(unlist(biomod_eval_output[[s]][[f]][[1]]),ncol = 4,byrow = T)[rowMeans(CV_output[[s]][[f]][[1]]@models.evaluation@val[1,1,1:8,1:4,1]) == max(rowMeans(CV_output[[s]][[f]][[1]]@models.evaluation@val[1,1,1:8,1:4,1])),1][1],#select the first model in case of ties
            matrix(unlist(biomod_eval_output[[s]][[f]][[1]]),ncol = 4,byrow = T)[rowMeans(CV_output[[s]][[f]][[3]]@models.evaluation@val[1,1,1:8,1:4,1]) == max(rowMeans(CV_output[[s]][[f]][[3]]@models.evaluation@val[1,1,1:8,1:4,1])),1][1],#select the first model in case of ties
            matrix(unlist(biomod_eval_output[[s]][[f]][[2]]),ncol = 4,byrow = T)[,1],
            matrix(unlist(biomod_eval_output[[s]][[f]][[3]]),ncol = 4,byrow = T)[,1],
            BRT_eval_output[[s]][[f]][[1]]))
    }
}
#tidy up structure
AUC.all.lat$AUC <- unlist(AUC.all.lat$AUC)
AUC.all.lat$speciesName <- as.character(AUC.all.lat$speciesName)
AUC.all.lat$Fold <- as.character(AUC.all.lat$Fold)
AUC.all.lat$Model <- as.character(AUC.all.lat$Model)


#do a quick boxplot to visualise AUC

AUC.plot <- ggplot(data = AUC.all.lat, aes(x = Model, y = AUC)) + 
    geom_boxplot(width = 0.3) + theme_light(base_size = 9)

ggsave(AUC.plot,filename = "AUC_lat thinned.pdf")
# 
# #looks good so far, save a copy of result df
# save(AUC.all.lat,file = "AUCresult - lat.RData")

#=============================now we include also the CV results, to compare difference in their narrative of comparative mdoel performance==================================#

no.model.per.run <- 14 #remember to include the 'best' CV models #including the 2 'best'models
AUC.CV <- data.frame(speciesName = I(rep(sp.names,each = n.fold*2*no.model.per.run)),#times two because block and random CV
                     Fold = rep(fold,each = no.model.per.run*2, times = nsp),
                     Model = rep(c(ind.mod.names,"block best","random best","Mean","blockWA","randomWA","dismoBRT"),n.fold*nsp*2),
                     AUC = rep(NA,n.fold*nsp*no.model.per.run*2),
                     Eval = rep(c("block","random"),each = no.model.per.run, times = nsp*n.fold)
)

#fill in AUC results
for (s in 1:nsp){
    for (f in fold){
        AUC.CV$AUC[which(AUC.CV$speciesName == sp.names[s] & AUC.CV$Fold == f)] <- as.numeric(c(
            rowMeans(CV_output[[s]][[f]][[1]]@models.evaluation@val[1,1,1:8,1:4,1]),
            rowMeans(CV_output[[s]][[f]][[1]]@models.evaluation@val[1,1,1:8,1:4,1])[rowMeans(CV_output[[s]][[f]][[1]]@models.evaluation@val[1,1,1:8,1:4,1]) == max(rowMeans(CV_output[[s]][[f]][[1]]@models.evaluation@val[1,1,1:8,1:4,1]))][1],#select the first model in case of ties
            rowMeans(CV_output[[s]][[f]][[1]]@models.evaluation@val[1,1,1:8,1:4,1])[rowMeans(CV_output[[s]][[f]][[3]]@models.evaluation@val[1,1,1:8,1:4,1]) == max(rowMeans(CV_output[[s]][[f]][[3]]@models.evaluation@val[1,1,1:8,1:4,1]))][1],#select the first model in case of ties
            mean(CV_output[[s]][[f]][[6]][c(1,3,5,7),3]),#this selects the mean ensemble on block CV
            mean(CV_output[[s]][[f]][[6]][c(2,4,6,8),3]),#this selects the block weight ensemble on block CV
            NA,NA,#these are NA because random weight ensemble and dismo BRT are not run on block CV
            rowMeans(CV_output[[s]][[f]][[3]]@models.evaluation@val[1,1,1:8,1:4,1]),
            rowMeans(CV_output[[s]][[f]][[3]]@models.evaluation@val[1,1,1:8,1:4,1])[rowMeans(CV_output[[s]][[f]][[1]]@models.evaluation@val[1,1,1:8,1:4,1]) == max(rowMeans(CV_output[[s]][[f]][[1]]@models.evaluation@val[1,1,1:8,1:4,1]))][1],#select the first model in case of ties
            rowMeans(CV_output[[s]][[f]][[3]]@models.evaluation@val[1,1,1:8,1:4,1])[rowMeans(CV_output[[s]][[f]][[3]]@models.evaluation@val[1,1,1:8,1:4,1]) == max(rowMeans(CV_output[[s]][[f]][[3]]@models.evaluation@val[1,1,1:8,1:4,1]))][1],#select the first model in case of ties
            mean(CV_output[[s]][[f]][[8]][c(1,3,5,7),3]),#this selects the mean ensemble on random CV
            NA,#this is NA because block weight ensemble are not run on random CV
            mean(CV_output[[s]][[f]][[8]][c(2,4,6,8),3]),#this selects the random weight ensemble on random CV
            NA#this is NA because dismo BRT are not run on random CV
        ))
    }
}
#tidy up structure
AUC.CV$AUC <- unlist(AUC.CV$AUC)
AUC.CV$speciesName <- as.character(AUC.CV$speciesName)
AUC.CV$Fold <- as.character(AUC.CV$Fold)
AUC.CV$Model <- as.character(AUC.CV$Model)
AUC.CV$Eval <- as.character(AUC.CV$Eval)

#===================================combine external and CV results=========================
AUC.all.lat$Eval <- rep("external",nrow(AUC.all.lat))
AUC.both <- rbind(AUC.CV,AUC.all.lat)
AUC.both$speciesName <- as.factor(AUC.both$speciesName)
AUC.both$Fold <- as.factor(AUC.both$Fold)
AUC.both$Model <- factor(AUC.both$Model, levels = unique(AUC.both$Model))
AUC.both$Eval <- factor(AUC.both$Eval, levels = unique(AUC.both$Eval))

#we can also remove NA rows
AUC.both <- na.omit(AUC.both)
#check plot
AUC_plot_lat <- 
    ggplot(data = AUC.both, aes(x = Model, y = AUC, fill = Eval)) + 
    geom_boxplot() + 
    theme_light(base_size = 14)  + 
    geom_vline(xintercept = 10.5) + 
    scale_fill_manual(values=c('#3A89C9', '#B4DDF7','#ED875E'))  + 
    geom_vline(linetype="dotted", xintercept = seq(1.5,13.5,1))

ggsave(AUC_plot_lat, filename = "AUC_lat thinned.pdf", width = 14, height = 8.7, units = "in", 
       useDingbats=F)



#also save a copy of combined AUC table
AUC.lat.thinned <- AUC.both
save(AUC.lat.thinned, file = "AUCresult - lat thinned.RData")

##########################compile logl#############################

###make a big empty data frame to contain all logl values
no.model.per.run <- 14 #remember to include the 'best' CV models
logl.all <- data.frame(speciesName = I(rep(sp.names,each = n.fold*no.model.per.run)),
                       Fold = rep(fold,each = no.model.per.run, times = nsp),
                       Model = rep(c(ind.mod.names,"block best","random best","Mean","blockWA","randomWA","dismoBRT"),n.fold*nsp),
                       logl = rep(NA,n.fold*nsp*no.model.per.run)
)

#fill in logl results
for (s in 1:nsp){
    for (f in fold){
        logl.all$logl[which(logl.all$speciesName == sp.names[s] & logl.all$Fold == f)] <- as.numeric(c(
            biomod_eval_output[[s]][[f]][[4]],
            biomod_eval_output[[s]][[f]][[4]][rowMeans(CV_output[[s]][[f]][[1]]@models.evaluation@val[1,1,1:8,1:4,1]) == max(rowMeans(CV_output[[s]][[f]][[1]]@models.evaluation@val[1,1,1:8,1:4,1]))][1],#select the first model in case of ties
            biomod_eval_output[[s]][[f]][[4]][rowMeans(CV_output[[s]][[f]][[3]]@models.evaluation@val[1,1,1:8,1:4,1]) == max(rowMeans(CV_output[[s]][[f]][[3]]@models.evaluation@val[1,1,1:8,1:4,1]))][1],#select the first model in case of ties
            biomod_eval_output[[s]][[f]][[5]],
            biomod_eval_output[[s]][[f]][[6]],
            BRT_eval_output[[s]][[f]][[2]]))
    }
}
#tidy up structure
logl.all$speciesName <- as.factor(logl.all$speciesName)
logl.all$Fold <- as.factor(logl.all$Fold)
logl.all$Model <- factor(logl.all$Model, levels = unique(logl.all$Model))

#check plot
logl_plot_lat <- 
    ggplot(data = logl.all, aes(x = Model, y = logl)) + 
    geom_boxplot() + 
    theme_light(base_size = 14)  + 
    geom_vline(xintercept = 10.5) + 
    labs(y = "log likelihood") + 
    geom_vline(linetype="dotted", xintercept = seq(1.5,13.5,1))

ggsave(logl_plot_lat, filename = "logl_lat_thinned.pdf", width = 14, height = 8.7, units = "in", 
       useDingbats=F)


logl.lat.thinned <- logl.all
save(logl.lat.thinned,file = "loglresult - lat - thinned.RData")


save.image("lat full workspace - thinned.RData")
